## Date Site N1 N1Error Filter Well HFGN1
## 3930 2021-02-20 Madison NA NA 2 1 253368
## 3931 2021-02-20 Madison NA NA 2 2 252801
## 3932 2021-02-20 Madison NA NA 2 3 173262
## 3933 2021-02-20 Madison NA NA 3 1 167274
## 3934 2021-02-20 Madison NA NA 3 2 126110
## 3935 2021-02-20 Madison NA NA 3 3 120750
LIMS N1 agrees with HFG data implying that using it to find an underlying trend of the High frequency data should be faithful.
CompFullDF <- FullDF%>%
filter(Date > StartHFG - 14,
Date < EndHFG + 14)#Reduces to only data around the HFG data
#HFGN1 < 1e7)
CompN1DF <- CompFullDF%>%#Remove rows with missing data
filter(!is.na(N1))
CompHFGDF <- CompFullDF%>%
filter(!is.na(HFGN1))
CompPlot <- ggplot()+
aes(x=Date)+
geom_point(aes(y = HFGN1,color = Filter) , size = .1 , data = CompHFGDF) +
geom_line(aes(y = N1,color = "Lims Data") , size = .5 , data = CompN1DF) +
scale_y_log10() +
facet_wrap(~Site) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplotly(CompPlot)
using smoothing found in the main story on the N1 data.
GroupLoess <- function(SmoothedSite){
FilteredDF <- FullDF%>%
filter(Site == SmoothedSite)%>%
select(Site,Date,N1)
FilteredDF$loessN1 <- loessFit(y=(FilteredDF$N1),
x=FilteredDF$Date, #create loess fit of the data
span=.4, #span of .2 seems to give the best result, not rigorously chosen
iterations=2)$fitted#2 iterations remove some bad patterns
return(FilteredDF)
}
FullLoessData <- bind_rows(lapply(HFGLocations,GroupLoess))%>%
distinct()
FullDF2 <- inner_join(HFGDF , FullLoessData , by = c("Date","Site"))%>%
filter(!is.na(loessN1))%>%
mutate(Filter = as.factor(Filter))
CompPlot2 <-FullDF2%>%
filter(HFGN1<4e7)%>%
ggplot()+
aes(x=Date)+
geom_point(aes(y = HFGN1,color=Filter) , size=.1)+
geom_line(aes(y = loessN1) , size=1)+
scale_y_log10()+
facet_wrap(~Site) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplotly(CompPlot2)
With the small amount of case data in the region the smoothing seems to generalize reasonably.
CompFullDF <- FullDF2%>%
filter(Date > StartHFG - 21,
Date < EndHFG + 14,
HFGN1 < 1e7)
CompN1DF <- CompFullDF%>%
filter(!is.na(loessN1))
CompCaseHFGDF <- HFGCaseDF%>%
mutate(ReportedCases = ifelse(ReportedCases == -999, NA, ReportedCases))%>%
filter(!is.na(ReportedCases))
CompPlot3 <- ggplot()+
aes(x=Date)+
geom_point(aes(y=ReportedCases,color="ReportedCases") , size=1 , data = CompCaseHFGDF)+
geom_line(aes(y=loessN1/1e4,color="Lims Data") , size=1 , data = CompN1DF)+
#scale_y_log10()+
facet_wrap(~Site)+
ggtitle("Simple shape analysis") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
CompPlot3
Once we remove the trend we have a dataset of errors. The resulting error is normaly distributed and its relationship with the trend is minimal.
CompFullDF <- FullDF2%>%
filter(HFGN1 < 1e7)%>%
mutate(TrendError = log(HFGN1 - loessN1))%>%
filter(!is.na(TrendError))
ggplot()+
geom_histogram(aes(x=TrendError) , size=1 , data = CompFullDF)+
facet_wrap(~Site)
ggplot()+
geom_point(aes(x=loessN1,y=TrendError) , size=1 , data = CompFullDF)+
facet_wrap(~Site)+
scale_x_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot()+
geom_point(aes(x=Date,y=TrendError) , size=1 , data = CompFullDF)+
facet_wrap(~Site)
We can control for filter to filter difference in variance by taking the variance of the errors for each Day. The resulting data is log normal and does not significantly scale with N1.
CompFullDF2 <- CompFullDF%>% #exploring measured variance of three replicates
group_by(Site,Date)%>%
summarize(TechnicalVariance = var(TrendError), LogN1 = mean(log(N1)),N=n())%>%
filter(N!=1)#variance cant be calculated from one sample
CompFullDF2%>% #looking at shape of Variance. Surprising it is log normal as it was
ggplot() + #generated by taking log N1 already.
aes(x=log(TechnicalVariance)) +
geom_histogram(bins = 15) +
facet_wrap(~Site)
CompFullDF2%>%#Relation between logN1 and the measured variance
ggplot() +
aes(y=log(TechnicalVariance)) +
geom_point(aes(x=LogN1)) +
facet_wrap(~Site)
From this we can calculate the variance to get a handle on the day to day fluctuations. We can either take the variance of the errors or we can take the mean of the sample variances. Both give similar answers and which one is better depends on what assumptions we want to make about the errors.
VarianceError <- var(CompFullDF$TrendError,na.rm = TRUE) # calculate variance of all the errors.
#Assumes same mean for all errors. This should be true as the value is generated
#by subtracting the mean from the original data.
MeanVariance <- weighted.mean(CompFullDF2$TechnicalVariance, CompFullDF2$N,na.rm = TRUE) #Weighted Mean
#of the variance. Does not assume each sample mean is equal but gives a larger
#variance
print(paste("Variance of Trend Errors:", round(VarianceError,4)))
## [1] "Variance of Trend Errors: 2.141"
print(paste("weighted mean of variances", round(MeanVariance,4)))
## [1] "weighted mean of variances 1.0365"